Required Packages

library(tidyverse)
library(minfi)
library(limma)
library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
library(minfi)
library(DMRcate)

# call helpers
source("../helper-scripts/DMP-utility-functions.R")
source("../helper-scripts/plotting-functions.R")
source("../helper-scripts/probe-annotation.R")
source("../helper-scripts/shared-helpers.R")

# knitr settings
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

Load Data

# Load Data
load(
  "../../arsenic-epigenetics-meta/Chile/buccal/data/buccal_funnorm_data.RData"
)

pheno <- as.data.frame(pheno)

# load ReFACTor output, join with pheno
refactor_components <- read_tsv(
  "preprocessing-reports/refactor_buccal.out.components.txt",
  col_names = FALSE
)
colnames(refactor_components) <- paste0("refactor", seq_len(6))
pheno <- cbind(pheno, refactor_components)

Exposure Counts

# Look at exposure distribution
exp_dist_p <- pheno %>% ggplot(aes(exposed)) +
  geom_histogram(alpha = 0.7, stat = "count", binwidth = 50) +
  xlab("Exposed?") +
  ylab("Count") +
  ggtitle("Distribution of Arsenic Exposure") +
  theme_minimal()

ggsave(filename = "EWAS-plots/arsenic-exp-dist.png", device = "png", dpi = 300)
exp_dist_p

Fit Models

# Unadjusted model matrix
modUnadj <- model.matrix(~pheno$exposed)
                 
# Adjusted for cell type
modCell <- model.matrix(~pheno$exposed + pheno$refactor1 + pheno$refactor2 +
                        pheno$refactor3 + pheno$refactor4 + pheno$refactor5 +
                        pheno$refactor6)
                   
# Adjusted for age, smoking, and sex
modAgeSMSex <- model.matrix(~pheno$exposed + pheno$age + pheno$smoking +
                            pheno$sex)

# Adjusted for age, smoking, sex, anc cell composition
modCellAgeSMSex <- model.matrix(~pheno$exposed + pheno$age + pheno$smoking +
                                pheno$sex + pheno$refactor1 + pheno$refactor2 +
                                pheno$refactor3 + pheno$refactor4 +
                                pheno$refactor5 + pheno$refactor6)

# Unadjusted
probeUnadj <- run_DMP(mvals = mvals_fun, design = modUnadj)

# no singificant CpG sites with BH correction
table(probeUnadj$P.Value < 0.05)
table(probeUnadj$adj.P.Val < 0.05)

# write sites with singificant raw p-values
write.table(
  probeUnadj,
  file = "EWAS-results/unadjusted.txt",
  sep = "\t",
  row.names = FALSE,
  col.names = TRUE
)

# plot results
gg_qqplot(probeUnadj$P.Value)
quartz.save("EWAS-plots/qq_log_unadjusted.png", type = "png", dpi = 300)

volcano(probeUnadj)
quartz.save("EWAS-plots/vol_log_unadjusted.png", type = "png", dpi = 300)

manhattan(probeUnadj, Locations)
quartz.save("EWAS-plots/man_log_unadjusted.png", type = "png", dpi = 300)


# Adjusted for cell type
probeCell <- run_DMP(mvals = mvals_fun, design = modCell)

# no singificant CpG sites with BH correction
table(probeCell$P.Value < 0.05)
table(probeCell$adj.P.Val < 0.05)

# save the results
write.table(
  probeCell,
  file = "EWAS-results/cell.txt",
  sep = "\t",
  row.names = FALSE,
  col.names = TRUE
)

# save the results
gg_qqplot(probeCell$P.Value)
quartz.save("EWAS-plots/qq_log_cell.png", type = "png", dpi = 300)

volcano(probeCell)
quartz.save("EWAS-plots/vol_log_cell.png", type = "png", dpi = 300)

manhattan(probeCell, Locations)
quartz.save("EWAS-plots/man_log_cell.png", type = "png", dpi = 300)


# Adjusted for age, smoking, and sex
probeAgeSmSex <- run_DMP(mvals = mvals_fun, design = modAgeSMSex)
   
# no singificant CpG sites with BH correction
table(probeAgeSmSex$P.Value < 0.05)
table(probeAgeSmSex$adj.P.Val < 0.05)

# save the results
write.table(
  probeAgeSmSex,
  file = "EWAS-results/age-sex-smoking.txt",
  sep = "\t",
  row.names = FALSE,
  col.names = TRUE
)

# plot results
gg_qqplot(probeAgeSmSex$P.Value)
quartz.save("EWAS-plots/qq_log_sex-age-smoking.png", type = "png", dpi = 300)

volcano(probeAgeSmSex)
quartz.save("EWAS-plots/vol_log_sex-age-smoking.png", type = "png", dpi = 300)

manhattan(probeAgeSmSex, Locations)
quartz.save("EWAS-plots/man_log_sex-age-smoking.png", type = "png", dpi = 300)


# Adjusted for cell type, age, smoking, and sex
probeCellAgeSmSex <- run_DMP(mvals = mvals_fun, design = modCellAgeSMSex)
   
# no singificant CpG sites with BH correction
table(probeCellAgeSmSex$P.Value < 0.05)
table(probeCellAgeSmSex$adj.P.Val < 0.05)

# save the results
write.table(
  probeCellAgeSmSex,
  file = "EWAS-results/cell-age-sex-smoking.txt",
  sep = "\t",
  row.names = FALSE,
  col.names = TRUE
)

# plot results
gg_qqplot(probeCellAgeSmSex$P.Value)
quartz.save("EWAS-plots/qq_log_cell-sex-age-smoking.png", type = "png",
            dpi = 300)

volcano(probeCellAgeSmSex)
quartz.save("EWAS-plots/vol_log_cell-sex-age-smoking.png", type = "png",
            dpi = 300)

manhattan(probeCellAgeSmSex, Locations)
quartz.save("EWAS-plots/man_log_cell-sex-age-smoking.png", type = "png",
            dpi = 300)

QQ Plot – Unadjusted

knitr::include_graphics("EWAS-plots/qq_log_unadjusted.png")

QQ Plot – Exposure Adjusted for Age, Sex, and Smoking Status

This model doesn’t appear to be accurate.

knitr::include_graphics("EWAS-plots/qq_log_sex-age-smoking.png")

Volcano Plot – Exposure Adjusted for Age, Sex, and Smoking Status

knitr::include_graphics("EWAS-plots/vol_log_sex-age-smoking.png")

Manhattan Plot – Exposure Adjusted for Age, Sex, and Smoking Status

knitr::include_graphics("EWAS-plots/man_log_sex-age-smoking.png")

QQ Plot – Exposure Adjusted for Age, Cells, Sex, and Smoking Status

This model appears to be accurate. However, based on this plot, there probably
won’t be any significant p-value after controlling for multiple testing.

knitr::include_graphics("EWAS-plots/qq_log_cell-sex-age-smoking.png")

Volcano Plot – Exposure Adjusted for Age, Cells, Sex, and Smoking Status

knitr::include_graphics("EWAS-plots/vol_log_cell-sex-age-smoking.png")

Manhattan Plot – Exposure Adjusted for Age, Cells, Sex, and Smoking Status

knitr::include_graphics("EWAS-plots/man_log_cell-sex-age-smoking.png")